This document will explore the spatial analyses of the data
library(PramCurry)
## Loading required package: poppr
## Loading required package: adegenet
## Loading required package: ade4
## ==========================
## adegenet 1.4-2 is loaded
## ==========================
##
## - to start, type '?adegenet'
## - to browse adegenet website, type 'adegenetWeb()'
## - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
##
##
## This is poppr version 1.1.2.99.70. To get started, type package?poppr
library(poppr)
library(reshape2)
library(ggplot2)
data(ramdat)
data(pop_data)
options(stringsAsFactors = FALSE)
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_1.0.0 reshape2_1.4 PramCurry_1.0 poppr_1.1.2.99-70
## [5] adegenet_1.4-2 ade4_1.6-2 rgdal_0.8-16 sp_1.0-15
##
## loaded via a namespace (and not attached):
## [1] ape_3.1-4 assertthat_0.1 bitops_1.0-6
## [4] boot_1.3-11 caTools_1.17.1 colorspace_1.2-4
## [7] digest_0.6.4 dplyr_0.2 evaluate_0.5.5
## [10] fastmatch_1.0-4 formatR_1.0 ggmap_2.3
## [13] grid_3.1.2 gtable_0.1.2 htmltools_0.2.6
## [16] httpuv_1.3.2 igraph_0.7.1 knitr_1.6
## [19] lattice_0.20-29 mapproj_1.2-2 maps_2.3-9
## [22] MASS_7.3-35 Matrix_1.1-4 munsell_0.4.2
## [25] nlme_3.1-117 packrat_0.4.1-1 parallel_3.1.2
## [28] pegas_0.6 permute_0.8-3 phangorn_1.99-7
## [31] plyr_1.8.1 png_0.1-7 proto_0.3-10
## [34] Rcpp_0.11.3 RgoogleMaps_1.2.0.6 rjson_0.2.14
## [37] RJSONIO_1.3-0 rmarkdown_0.3.10 scales_0.2.4
## [40] shiny_0.10.1 stringr_0.6.2 tools_3.1.2
## [43] vegan_2.0-10 xtable_1.7-4 yaml_2.1.13
The purpose of a mantel test is to test the hypothesis that genetic distance is correlated with geographic distance. As we have defined this data by year and by watershed, we will perform the mantel test on 4 scales:
options(digits = 10)
newReps <- other(ramdat)$REPLEN
(newReps[3] <- 4) # Tetranucleotide repeat
## [1] 4
(newReps <- fix_replen(ramdat, newReps))
## PrMS6A1 Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1
## 3.00000 2.00000 4.00001 4.00000 4.00000
bdist <- bruvo.dist(ramdat, replen = newReps)#other(ramdat)$REPLEN)
gdist <- dist(pop_data[, c("LAT", "LON")])
system.time(overall_mantel <- spatial_stats(ramdat,
xy = gdist,
hierarchy = NULL,
distance = bdist,
sample = 99999,
seed = 9001))
## user system elapsed
## 42.631 0.312 43.053
overall_mantel
## Monte-Carlo test
## Call: mantel.randtest(m1 = dist_xy, m2 = dist_gen, nrepet = sample)
##
## Observation: 0.5234581822
##
## Based on 99999 replicates
## Simulated p-value: 1e-05
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 1.991809716e+01 -8.695085620e-06 6.906892877e-04
overall_lm <- spatial_stats(ramdat,
xy = gdist,
hierarchy = NULL,
distance = bdist,
stat = "lm")
summary(overall_lm)
##
## Call:
## lm(formula = dist_xy_vec ~ dist_gen_vec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31377179 -0.05563021 -0.01119471 0.05422089 0.27984652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0337124914 0.0004386434 76.85626 < 2.22e-16 ***
## dist_gen_vec 0.6449916969 0.0028970938 222.63404 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08497729 on 131326 degrees of freedom
## Multiple R-squared: 0.2740085, Adjusted R-squared: 0.2740029
## F-statistic: 49565.92 on 1 and 131326 DF, p-value: < 2.2204e-16
noseb <- popsub(setpop(ramdat, ~ZONE2), blacklist = "HunterCr")
bdist.noseb <- bruvo.dist(noseb, replen = newReps)
gdist.noseb <- dist(other(noseb)$xy)
system.time(overall_mantel_noseb <- spatial_stats(noseb,
xy = gdist.noseb,
hierarchy = NULL,
distance = bdist.noseb,
sample = 99999,
seed = 9001))
## user system elapsed
## 25.778 0.182 26.014
overall_mantel_noseb
## Monte-Carlo test
## Call: mantel.randtest(m1 = dist_xy, m2 = dist_gen, nrepet = sample)
##
## Observation: 0.1747198509
##
## Based on 99999 replicates
## Simulated p-value: 1e-05
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 5.4675869829040 -0.0000732971456 0.0010220153208
overall_lm_noseb <- spatial_stats(noseb,
xy = gdist.noseb,
hierarchy = NULL,
distance = bdist.noseb,
stat = "lm")
summary(overall_lm_noseb)
##
## Call:
## lm(formula = dist_xy_vec ~ dist_gen_vec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.109238802 -0.040784800 -0.009252208 0.034635346 0.204460118
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0588800416 0.0002875805 204.74282 < 2.22e-16 ***
## dist_gen_vec 0.1236855136 0.0022077129 56.02427 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05130993 on 99679 degrees of freedom
## Multiple R-squared: 0.03052703, Adjusted R-squared: 0.0305173
## F-statistic: 3138.719 on 1 and 99679 DF, p-value: < 2.2204e-16
system.time(by_year <- spatial_stats(ramdat,
xy = gdist,
hierarchy = ~Pop,
distance = bdist,
sample = 99999,
seed = 9001))
## user system elapsed
## 7.775 0.388 8.176
by_year_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~Pop, distance = bdist,
stat = "lm")
system.time(by_zone <- spatial_stats(ramdat,
xy = gdist,
hierarchy = ~ZONE2,
distance = bdist,
sample = 99999,
seed = 9001))
## user system elapsed
## 10.584 0.263 10.866
by_zone_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~ZONE2,
distance = bdist, stat = "lm")
system.time(by_yearzone <- spatial_stats(ramdat,
xy = gdist,
hierarchy = ~ZONE2/Pop,
distance = bdist,
sample = 99999,
seed = 9001,
ncol = 5))
## user system elapsed
## 5.826 0.726 6.564
by_yearzone_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~ZONE2/Pop,
distance = bdist, stat = "lm", ncol = 5)
get_summary <- function(x, value){
vapply(x, "[[", numeric(1), value)
}
split_name <- function(x){
strsplit(x, "_")[[1]]
}
resmat <- matrix(nrow = length(by_zone) + 1, ncol = length(by_year) + 1,
dimnames = list(Region = c(names(by_zone), "Pooled"),
Year = c(names(by_year), "Pooled"))
)
resmat.p <- resmat
resmat["Pooled", ] <- c(get_summary(by_year, "obs"), overall_mantel$obs)
resmat.p["Pooled", ] <- c(get_summary(by_year, "pvalue"), overall_mantel$pvalue)
resmat[, "Pooled"] <- c(get_summary(by_zone, "obs"), overall_mantel$obs)
resmat.p[, "Pooled"] <- c(get_summary(by_zone, "pvalue"), overall_mantel$pvalue)
yz.obs <- get_summary(by_yearzone, "obs")
yz.p <- get_summary(by_yearzone, "pvalue")
for (i in names(yz.obs)){
ndex <- split_name(i)
resmat[ndex[1], ndex[2]] <- yz.obs[i]
resmat.p[ndex[1], ndex[2]] <- yz.p[i]
}
charmat <- apply(resmat, 2, function(x) as.character(round(x, 2)))
charmat.p <- ifelse(resmat.p > 0.1, "",
ifelse(resmat.p > 0.05, ".",
ifelse(resmat.p > 0.01, "*",
ifelse(resmat.p > 0.001, "**", "***"))))
charmatnew <- charmat
charmatnew[] <- paste0(charmat, charmat.p)
charmatnew[] <- paste0(charmat, charmat.p)
charmatnew[grep("NANA", charmatnew)] <- NA
charmatnew[grep("NaN", charmatnew)] <- NaN
dimnames(charmatnew) <- dimnames(resmat)
charmatnew
## Year
## Region 2001 2002 2003 2004 2005 2006 2010
## JHallCr "0.06" "0.24." "0.14***" "0.28***" "NaN" NA NA
## NFChetHigh NA NA "NaN" NA NA NA NA
## Coast NA NA NA NA NA "NaN" "NaN"
## HunterCr NA NA NA NA NA NA NA
## Winchuck NA NA NA NA NA NA NA
## ChetcoMain NA NA NA NA NA NA NA
## PistolRSF NA NA NA NA NA NA NA
## Pooled "0.06" "0.24." "0.13***" "0.28***" "NaN" "NaN" "NaN"
## Year
## Region 2011 2012 2013 2014 Pooled
## JHallCr NA NA "0.18**" "NaN" "0.14***"
## NFChetHigh NA "0.68." "0.41***" "-0.23" "0.35***"
## Coast "NaN" "NaN" "0.55*" "-0.25" "0.13."
## HunterCr "0.06" NA NA NA "0.06"
## Winchuck NA "0.41**" "0.03" NA "0.11"
## ChetcoMain NA NA "0.53." "NaN" "0.63*"
## PistolRSF NA NA "0.94" NA "0.94"
## Pooled "0.87***" "0.59***" "0.15***" "0.14*" "0.52***"
write.table(charmatnew, file = "mantel_table.csv", sep = ",", row.names = TRUE,
col.names = NA, na = "-")